# This analysis was conducted using R Studio Version 1.1.419 for Mac and R version 3.5.2 for Mac
# Mac OS is Sonoma 14.2.1. Note that the STM results only replicate exactly on Mac. Results with differ on Windows.
# Replication Code for "Austerity and Aggression"
# Richard Clark and Anna M. Meyerrose
sink("log.txt")
rm(list = ls(all = TRUE))
install.packages(pacman)
pacman::p_load(foreign, ggplot2, plm, reshape2, countrycode, sandwich, lmtest, MASS, qdapRegex,
               rworldmap, RColorBrewer, states, mice, VIM, stargazer, margins, clusterSEs, lme4, optimx,
               coefplot, lattice, survey, dplyr, wordcloud, survey, sampleSelection, quanteda, devtools,
               Matrix, ldatuning, topicmodels, readtext, stm, lda, bursts, tidytext, haven,
               tm, readstata13, fastDummies, panelView, igraph, NetMix, dnr, statnet, network, sna, 
               DataCombine, sensemakr, estimatr, tidyverse, manifestoR, sjstats, texreg, rlist, pipeR)
setwd("ENTER DIRECTORY") # set to the relevant directory

####################### Load Data #############
## These are the datasets needed to replicate the analyses below. 
## The code necessary to merge and impute is available upon request.
prelim <- read.csv("prelim.csv") # no lag, no imputation
prelimimp <- read.csv("prelimimp.csv") # no lag, with imputation
prelimlag <- read.csv("prelimlag.csv") # lag, no imputation
prelimimplag <- read.csv("prelimlagimp.csv") # lag, with imputation
memsimplag <- read.csv("memsimp.csv") # imputed for selection model
####################### Data Pre-processing ##########
prelim$dur <- scale(as.numeric(as.character(prelim$dur))) # standardize all data to ease convergence of models
prelim$loansize <- log1p(as.numeric(as.character(prelim$loansize)))
prelim$quota <- scale(log1p(prelim$quota))
prelim$polity2 <- scale(prelim$polity2)
prelim$absidealimportantdiff <- scale(prelim$absidealimportantdiff)
prelim$absidealdiff <- scale(prelim$absidealdiff)
prelim$USaid <- scale(log1p(prelim$USaid))
prelim$DACaid <- scale(log1p(prelim$DACaid))
prelim$gdppc <- scale(prelim$gdppc)
prelim$gdp <- scale(log(prelim$gdp))
prelim$trade <- scale(prelim$trade)
prelim$reserves <- scale(log1p(prelim$reserves))
prelim$fdigdp <- scale(prelim$fdigdp)
prelim$dservexp <- scale(prelim$dservexp)
prelim$dshortexp <- scale(prelim$dshortexp)
prelim$count <- scale(prelim$count)
prelim$ccode <- as.character(prelim$ccode)
prelim$inflation <- scale(prelim$inflation)
prelim$curraccgdp <- scale(prelim$curraccgdp)
prelim$USaid[prelim$ctry %in% c("Portugal", "Ireland", "Cyprus")] <- 0 # code EU countries with missing data as no US aid
prelim$stone <- rowSums(prelim[,c("DEB","FIN","FP","EXT","RTP","SOE","LAB","PRI","SP","POV",
                                        "INS","ENV","OTH")]>0) # create number of categories variable
prelim <- prelim[prelim$year < 2015,] # subset years
prelim <- prelim[-c(1,4,6:8,10)] # subset columns

prelimlag$dur <- scale(as.numeric(as.character(prelimlag$dur))) # standardize all data to ease convergence of models
prelimlag$loansize <- log1p(as.numeric(as.character(prelimlag$loansize)))
prelimlag$quota <- scale(log1p(prelimlag$quota))
prelimlag$polity2 <- scale(prelimlag$polity2)
prelimlag$absidealimportantdiff <- scale(prelimlag$absidealimportantdiff)
prelimlag$absidealdiff <- scale(prelimlag$absidealdiff)
prelimlag$USaid <- scale(log1p(prelimlag$USaid))
prelimlag$DACaid <- scale(log1p(prelimlag$DACaid))
prelimlag$gdppc <- scale(prelimlag$gdppc)
prelimlag$cinc <- scale(prelimlag$cinc)
prelimlag$exec_yrs_in_office <- scale(prelimlag$exec_yrs_in_office)
prelimlag$ethnic_frac <- scale(prelimlag$ethnic_frac)
prelimlag$population_total <- scale(log1p(as.numeric(prelimlag$population_total)))
prelimlag$gdp <- scale(log(prelimlag$gdp))
prelimlag$trade <- scale(prelimlag$trade)
prelimlag$reserves <- scale(log1p(prelimlag$reserves))
prelimlag$fdigdp <- scale(prelimlag$fdigdp)
prelimlag$dservexp <- scale(prelimlag$dservexp)
prelimlag$dshortexp <- scale(prelimlag$dshortexp)
prelimlag$count <- scale(prelimlag$count)
prelimlag$ccode <- as.character(prelimlag$ccode)
prelimlag$inflation <- scale(prelimlag$inflation)
prelimlag$curraccgdp <- scale(prelimlag$curraccgdp)
prelimlag$prop_neoliberal <- scale(prelimlag$prop_neoliberal)
prelimlag$USaid[prelimlag$ctry %in% c("Portugal", "Ireland", "Cyprus")] <- 0 # code EU countries with missing data as no US aid
prelimlag$stone <- rowSums(prelimlag[,c("DEB","FIN","FP","EXT","RTP","SOE","LAB","PRI","SP","POV",
                                    "INS","ENV","OTH")]>0) # create number of categories variable
prelimlag <- prelimlag[prelimlag$year < 2015,]
prelimlag$populist_exec_indicator[is.na(prelimlag$populist_exec_indicator)] <- 0

prelimimp$dur <- scale(as.numeric(as.character(prelimimp$dur))) # standardize all data to ease convergence of models
prelimimp$loansize <- log1p(as.numeric(as.character(prelimimp$loansize)))
prelimimp$quota <- scale(log1p(prelimimp$quota))
prelimimp$polity2 <- scale(prelimimp$polity2)
prelimimp$absidealimportantdiff <- scale(prelimimp$absidealimportantdiff)
prelimimp$absidealdiff <- scale(prelimimp$absidealdiff)
prelimimp$USaid <- scale(log1p(prelimimp$USaid))
prelimimp$DACaid <- scale(log1p(prelimimp$DACaid))
prelimimp$count <- scale(prelimimp$count)
prelimimp$cinc <- scale(prelimimp$cinc)
prelimimp$exec_yrs_in_office <- scale(prelimimp$exec_yrs_in_office)
prelimimp$ethnic_frac <- scale(prelimimp$ethnic_frac)
prelimimp$population_total <- scale(log1p(as.numeric(prelimimp$population_total)))
prelimimp$gdppc <- scale(prelimimp$gdppc)
prelimimp$gdp <- scale(log(prelimimp$gdp))
prelimimp$trade <- scale(prelimimp$trade)
prelimimp$reserves <- scale(log1p(prelimimp$reserves))
prelimimp$fdigdp <- scale(prelimimp$fdigdp)
prelimimp$dservexp <- scale(prelimimp$dservexp)
prelimimp$dshortexp <- scale(prelimimp$dshortexp)
prelimimp$ccode <- as.character(prelimimp$ccode)
prelimimp$inflation <- scale(prelimimp$inflation)
prelimimp$curraccgdp <- scale(prelimimp$curraccgdp)
prelimimp$prop_neoliberal <- scale(prelimimp$prop_neoliberal)
prelimimp$stone <- rowSums(prelimimp[,c("DEB","FIN","FP","EXT","RTP","SOE","LAB","PRI","SP","POV",
                                        "INS","ENV","OTH")]>0) # create number of categories variable
prelimimp <- prelimimp[prelimimp$year < 2015,]
prelimimp$populist_exec_indicator[is.na(prelimimp$populist_exec_indicator)] <- 0 # NA populists are 0

prelimimplag$stone <- rowSums(prelimimplag[,c("DEB","FIN","FP","EXT","RTP","SOE","LAB","PRI","SP","POV",
                                              "INS","ENV","OTH")]>0) # create number of categories variable
imftemp <- prelimimplag[c(2,3,12,58)]
prelimimplag$dur <- scale(as.numeric(as.character(prelimimplag$dur))) # standardize data
prelimimplag$loansize <- log1p(as.numeric(as.character(prelimimplag$loansize)))
prelimimplag$quota <- scale(log1p(prelimimplag$quota))
prelimimplag$polity2 <- scale(prelimimplag$polity2)
prelimimplag$absidealimportantdiff <- scale(prelimimplag$absidealimportantdiff)
prelimimplag$absidealdiff <- scale(prelimimplag$absidealdiff)
prelimimplag$count <- scale(prelimimplag$count)
prelimimplag$USaid <- scale(log1p(prelimimplag$USaid))
prelimimplag$DACaid <- scale(log1p(prelimimplag$DACaid))
prelimimplag$gdppc <- scale(prelimimplag$gdppc)
prelimimplag$cinc <- scale(prelimimplag$cinc)
prelimimplag$exec_yrs_in_office <- scale(prelimimplag$exec_yrs_in_office)
prelimimplag$ethnic_frac <- scale(prelimimplag$ethnic_frac)
prelimimplag$population_total <- scale(log1p(as.numeric(prelimimplag$population_total)))
prelimimplag$gdp <- scale(log(prelimimplag$gdp))
prelimimplag$trade <- scale(prelimimplag$trade)
prelimimplag$reserves <- scale(log1p(prelimimplag$reserves))
prelimimplag$fdigdp <- scale(prelimimplag$fdigdp)
prelimimplag$dservexp <- scale(prelimimplag$dservexp)
prelimimplag$dshortexp <- scale(prelimimplag$dshortexp)
prelimimplag$ccode <- as.character(prelimimplag$ccode)
prelimimplag$inflation <- scale(prelimimplag$inflation)
prelimimplag$curraccgdp <- scale(prelimimplag$curraccgdp)
prelimimplag$prop_neoliberal <- scale(prelimimplag$prop_neoliberal)
prelimimplag2 <- prelimimplag[prelimimplag$PRGF == 0,] # robustness dataset excluding PRGF
prelimimplag <- prelimimplag[prelimimplag$year < 2015,]
prelimimplag2 <- prelimimplag2[prelimimplag2$year < 2015,]
prelimimplag$populist_exec_indicator[is.na(prelimimplag$populist_exec_indicator)] <- 0
prelimimplag2$populist_exec_indicator[is.na(prelimimplag2$populist_exec_indicator)] <- 0

memsimplag$quota <- scale(log1p(memsimplag$quota)) # standardize data
memsimplag$polity2 <- scale(memsimplag$polity2)
memsimplag$absidealimportantdiff <- scale(memsimplag$absidealimportantdiff)
memsimplag$absidealdiff <- scale(memsimplag$absidealdiff)
memsimplag$USaid <- scale(log1p(memsimplag$USaid))
memsimplag$DACaid <- scale(log1p(memsimplag$DACaid))
memsimplag$gdppc <- scale(memsimplag$gdppc)
memsimplag$gdp <- scale(log1p(memsimplag$gdp))
memsimplag$trade <- scale(memsimplag$trade)
memsimplag$reserves <- scale(log1p(memsimplag$reserves))
memsimplag$cinc <- scale(memsimplag$cinc)
memsimplag$exec_yrs_in_office <- scale(memsimplag$exec_yrs_in_office)
memsimplag$ethnic_frac <- scale(memsimplag$ethnic_frac)
memsimplag$population_total <- scale(log1p(as.numeric(memsimplag$population_total)))
memsimplag$fdigdp <- scale(memsimplag$fdigdp)
memsimplag$dservexp <- scale(memsimplag$dservexp)
memsimplag$dshortexp <- scale(memsimplag$dshortexp)
memsimplag$ccode <- as.character(memsimplag$ccode)
memsimplag$inflation <- scale(memsimplag$inflation)
memsimplag$curraccgdp <- scale(memsimplag$curraccgdp)
imftemp <- imftemp %>% group_by(ccode, year) %>% top_n(1, count)
imftemp <- imftemp %>% group_by(ccode, year) %>% top_n(1, stone)
memsimplag2 <- left_join(unique(memsimplag), unique(imftemp), c("ccode", "year"))
tempimf <- aggregate(memsimplag$part, by = list(memsimplag$year), FUN = mean) # create dataset with ratio of member countries receiving support in a year
# this serves as a measure of budget constraint for the IMF
colnames(tempimf) <- c("year", "ratio") # change column names
tempc <- aggregate(memsimplag$part, by = list(memsimplag$ccode), FUN = mean) # create dataset with ratio of years that each country received support for IMF
# this serves as a measure of the likelihood a country will receive support in a year
colnames(tempc) <- c("ccode", "ratio") # change column names
memsimplag$imfratio <- NA # create excludable interaction variable for instrumentation
memsimplag$cratio <- NA # first create empty columns
for (i in 1:6473) {
  for (j in 1:37) {
    memsimplag$imfratio[i] <- ifelse(memsimplag$year[i] == tempimf$year[j], tempimf$ratio[j], 
                                     memsimplag$imfratio[i]) 
  }
} # then write ratios to the selection dataset
for (i in 1:6473) {
  for (j in 1:187) {
    memsimplag$cratio[i] <- ifelse(memsimplag$ccode[i] == tempc$ccode[j], tempc$ratio[j], 
                                     memsimplag$cratio[i]) 
  }
}
memsimplag$inter <- memsimplag$cratio*memsimplag$imfratio # interact the two ratios
memsimplag$inter <- scale(memsimplag$inter)
memsimplag <- memsimplag[memsimplag$year < 2015,]
memsimplag$populist_exec_indicator[is.na(memsimplag$populist_exec_indicator)] <- 0

memsimplag2$imfratio <- memsimplag$imfratio
memsimplag2$cratio <- memsimplag$cratio
memsimplag2$inter <- memsimplag$inter
tempcond <- aggregate(memsimplag2$count, by = list(memsimplag2$ccode), FUN = mean) # create dataset with ratio of years that each country received support for IMF
# this serves as a measure of the likelihood a country will receive support in a year
colnames(tempcond) <- c("ccode", "condratio") # change column names
memsimplag2$condratio <- NA # first create empty columns
for (i in 1:6473) {
  for (j in 1:187) {
    memsimplag2$condratio[i] <- ifelse(memsimplag2$ccode[i] == tempcond$ccode[j], tempcond$condratio[j], 
                                   memsimplag2$condratio[i]) 
  }
}
memsimplag2$intercond <- memsimplag2$condratio*memsimplag2$imfratio # interact the two ratios
memsimplag2$intercond <- scale(memsimplag2$intercond)
memsimplag2 <- memsimplag2[memsimplag2$year < 2015,] # subset to years before 2015

tempstone <- aggregate(memsimplag2$stone, by = list(memsimplag2$ccode), FUN = mean) # create dataset with ratio of years that each country received support for IMF
# this serves as a measure of the likelihood a country will receive support in a year
colnames(tempstone) <- c("ccode", "stoneratio") # change column names
memsimplag2$stoneratio <- NA # first create empty columns
for (i in 1:6473) {
  for (j in 1:187) {
    memsimplag2$stoneratio[i] <- ifelse(memsimplag2$ccode[i] == tempcond$ccode[j], tempstone$stoneratio[j], 
                                      memsimplag2$stoneratio[i]) 
  }
}
memsimplag2$interstone <- memsimplag2$stoneratio*memsimplag2$imfratio # interact the two ratios
memsimplag2$interstone <- scale(memsimplag2$interstone)
memsimplag2 <- memsimplag2[memsimplag2$year < 2015,] # subset to years before 2015

####################### Anecdotes ####
cases <- prelimimplag[prelimimplag$count > 0 & prelimimplag$mids > 0,] # look at cases where there are IMF programs and MIDs
####################### Main Tests (Models 1 and 2 in Table 1) ##################################################
ctryfe <- list(c("Country fixed effects", "Yes", "Yes", "Yes")) # create rows denoting model specifications for regression tables
ctryre <- list(c("Country random effects", "Yes", "Yes"))
yearfe <- list(c("Year fixed effects", "Yes", "Yes", "Yes"))
yearre <- list(c("Year random effects", "Yes", "Yes"))
regionfe <- list(c("Region fixed effects", "Yes", "Yes"))
nb <- list(c("Model type", "Negative binomial", "Negative binomial", "Negative binomial"))
imfp <- list(c("Predicted prob of IMF program", "No", "No", "Yes"))

covs_full <- c("Conditions", "GDPPC", "Trade openness", "Population",
               "CINC", "Major power", "Polity2", "Executive years in office", 
               "Ethnic fractionalization", "UN voting (ideal pt dist from US)") # cov labels for tables

biv1l <- glm.nb(mids ~ count + factor(ccode) + factor(year), prelimlag,
                control=glm.control(maxit=20)) # bivariate with FEs, lag, no imputation
(sebiv1l <- coeftest(biv1l, vcov = vcovHC(biv1l, type = "HC0", cluster = prelim$ccode)))

m1 <- glm.nb(mids ~ count + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
               factor(ccode) + factor(year), prelimimplag,
               control=glm.control(maxit=20)) # multivariate with FEs, lag, imputation
(sem1 <- coeftest(m1, vcov = vcovHC(m1, type = "HC0", cluster = prelimimplag$ccode)))

####################### Robustness checks ####
##->## Alternate stringency measure (from Stone 2008)
covs_alt <- c("Categories", "GDPPC", "Trade openness", "Population",
              "CINC", "Major power", "Polity2", "Executive years in office", 
              "Ethnic fractionalization", "UN voting (ideal pt dist from US)")

biv2 <- glm.nb(mids ~ stone + factor(ccode) + factor(year), prelimlag,
                control=glm.control(maxit=20)) # bivariate with FEs, lag, no imputation
(sebiv2 <- coeftest(biv2, vcov = vcovHC(biv2, type = "HC0", cluster = prelim$ccode)))

m2 <- glm.nb(mids ~ stone + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
               factor(ccode) + factor(year), prelimimplag,
             control=glm.control(maxit=20)) # multivariate with alternate stringency measure
(sem2 <- coeftest(m2, vcov = vcovHC(m2, type = "HC0", cluster = prelimimplag$ccode)))

##->## No imputation
m1d <- glm.nb(mids ~ count + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
               factor(ccode) + factor(year), prelimlag,
             control=glm.control(maxit=20)) # multivariate with FEs, lag
(sem1d <- coeftest(m1d, vcov = vcovHC(m1d, type = "HC0", cluster = prelimlag$ccode)))

stargazer(m1d, se = list(sem1d[,2]),
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = covs_full,
          style = "ajps", omit = c("ccode", "year"), 
          add.lines = c(ctryfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table A12

##->## Austerity conditions only
prelimimplag$austerity <- prelimimplag$RTP + prelimimplag$FP + prelimimplag$PRI + prelimimplag$SOE # restrict to austerity measures from Kentikelenis et al. 2016 data
prelimimplag$austerity <- scale(prelimimplag$austerity) # scale

covs_alt2 <- c("Austerity conditions", "GDPPC", "Trade openness", "Population",
              "CINC", "Major power", "Polity2", "Executive years in office", 
              "Ethnic fractionalization", "UN voting (ideal pt dist from US)")
m3 <- glm.nb(mids ~ austerity + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
               factor(ccode) + factor(year), prelimimplag,
               control=glm.control(maxit=20)) # multivariate with alternate stringency measure
(sem3 <- coeftest(m3, vcov = vcovHC(m3, type = "HC0", cluster = prelimimplag$ccode)))
stargazer(m3, se = list(sem3[,2]),
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = covs_alt2,
          style = "ajps", omit = c("ccode", "year"), 
          add.lines = c(ctryfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table A13

##->## Region FEs
prelimimplag$region <- countrycode(prelimimplag$ccode, origin = "iso3c", destination = "region")
m4 <- glm.nb(mids ~ count + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
               factor(region) + factor(year), prelimimplag,
               control=glm.control(maxit=20)) # multivariate with region FEs
(sem4 <- coeftest(m4, vcov = vcovHC(m4, type = "HC0", cluster = prelimimplag$ccode)))
stargazer(m4, se = list(sem4[,2]),
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = covs_full,
          style = "ajps", omit = c("region", "year"), 
          add.lines = c(regionfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table A20

##->## Hierarchical model (ctry and year random effects)
m5 <- glmer.nb(mids ~ count + gdppc + trade + population_total + cinc + major_power +
                 polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
                 (1|ccode) + (1|year), prelimimplag) # random effects
summary(m5)
stargazer(m5,
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = covs_full,
          style = "ajps", omit = c("ccode", "year"), 
          add.lines = c(ctryre, yearre, nb),
          omit.stat = c("ll", "aic", "theta", "bic")) # Table A19

##->## Drop OECD
oecd <- read.csv("oecd.csv") # load hand-coded data on OECD membership from https://www.oecd.org/about/document/ratification-oecd-convention.htm
oecd$ccode <- countrycode(oecd$ctry, origin = "country.name", destination = "iso3c")
oecd <- merge(unique(prelimimplag), unique(oecd), by = c("ccode", "year"), all.x=T)
oecd$oecd[is.na(oecd$oecd)] <- 0
oecd <- oecd[oecd$oecd == 0,]
m6 <- glm.nb(mids ~ count + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
               factor(ccode) + factor(year), oecd,
             control=glm.control(maxit=20)) # multivariate with alternate stringency measure
(sem6 <- coeftest(m6, vcov = vcovHC(m6, type = "HC0", cluster = oecd$ccode)))
stargazer(m6, se = list(sem6[,2]),
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = covs_full,
          style = "ajps", omit = c("ccode", "year"), 
          add.lines = c(ctryfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table A15

##->## World Bank conditions
wb <- read.csv("Dev_Policy_Action_Database_10.18.csv", fileEncoding="UTF-8-BOM") # load raw data from World Bank's Development Policy Action database, 
# which was obtained via an access to information request in October 2018. Note that I changed the "prsc" column values where a dash was present to "N" to resolve an import error.
wb$ccode <- countrycode(wb$ctry, origin = "country.name", destination = "iso3c") 
wb <- aggregate(wb$count_pa, by = list(wb$ccode, wb$year), FUN = sum)
colnames(wb) <- c("ccode", "year", "count_pa")
wb$year <- wb$year + 1
wb <- merge(unique(prelimimplag), unique(wb), by = c("ccode", "year"), all.x=T)
wb$count_pa[is.na(wb$count_pa)] <- 0
m7 <- glm.nb(mids ~ count_pa + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
               factor(ccode) + factor(year), wb,
             control=glm.control(maxit=20)) # multivariate with alternate stringency measure
(sem7 <- coeftest(m7, vcov = vcovHC(m7, type = "HC0", cluster = wb$ccode)))
stargazer(m7, se = list(sem7[,2]),
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = covs_full,
          style = "ajps", omit = c("ccode", "year"), 
          add.lines = c(ctryfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table A24

##->## Interaction with casualties
mids <- read.csv("MIDB 5.0.csv", fileEncoding="UTF-8-BOM") # Load MIDs incident-level dataset downloaded 9/21/21
mids <- mids[mids$orig == 1,] # subset to cases where state initiates
mids$mid <- 1
colnames(mids)[6] <- "year"
mids <- aggregate(list(mids$hostlev, mids$fatality), by = list(mids$ccode, mids$year), FUN = sum)
colnames(mids) <- c("ccode", "year", "hostlevel", "fatality")
mids$ccode <- countrycode(mids$ccode, origin = "cown", destination = "iso3c")
CY <- merge(unique(prelimimplag), unique(mids), by = c("ccode", "year"), all.x=T) # merge wars into shell
CY$hostlevel[is.na(CY$hostlevel)] <- 0 # change NA war to 0
CY$fatality[is.na(CY$fatality)] <- 0 # change NA war to 0
CY$lowfatality <- ifelse(CY$fatality < 1 & CY$mids > 0, 1, 0)
CY$highfatality <- ifelse(CY$fatality > 0 & CY$mids > 0, 1, 0)
m1 <- glm.nb(lowfatality ~ count + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
               factor(ccode) + factor(year), CY,
             control=glm.control(maxit=20)) # multivariate with FEs, lag, imputation
(sem1 <- coeftest(m1, vcov = vcovHC(m1, type = "HC0", cluster = prelimimplag$ccode)))

m2 <- glm.nb(highfatality ~ count + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
               factor(ccode) + factor(year), CY,
             control=glm.control(maxit=20)) # multivariate with FEs, lag, imputation
(sem2 <- coeftest(m2, vcov = vcovHC(m2, type = "HC0", cluster = prelimimplag$ccode)))

stargazer(m1, m2, se = list(sem1[,2], sem2[,2]), 
          dep.var.labels = c("MIDs w/o casualties", "MIDs with casualties"),
          covariate.labels = covs_full,
          style = "ajps", omit = c("ccode", "year"), 
          add.lines = c(ctryfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table 3

##->## RFA test
rfa <- read.csv("RFA_ctry.csv", fileEncoding="UTF-8-BOM") # load data from Clark 2022 "Bargain Down or Shop Around" in JOP
rfa$ccode <- countrycode(rfa$ctry, origin = "country.name", destination = "iso3c") 
rfa$year <- rfa$year + 1
rfa <- merge(unique(prelimimplag), unique(rfa), by = c("ccode", "year"), all.x=T)
rfa$ooamt[is.na(rfa$ooamt)] <- 0
m8 <- glm.nb(mids ~ I(ooamt > 0) + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
               factor(ccode) + factor(year), rfa,
             control=glm.control(maxit=20)) # multivariate with alternate stringency measure
(sem8 <- coeftest(m8, vcov = vcovHC(m8, type = "HC0", cluster = rfa$ccode)))
stargazer(m8, se = list(sem8[,2]),
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = covs_full,
          style = "ajps", omit = c("ccode", "year"), 
          add.lines = c(ctryfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table A25

##->## Additional covariates test 
fragile <- read.csv("fragile_wb.csv") # load World Bank data on fragile states hand-coded from reports https://www.worldbank.org/en/topic/fragilityconflictviolence/brief/harmonized-list-of-fragile-situations
fragile$ccode <- countrycode(fragile$ctry, origin = "country.name", destination = "iso3c")
fragile <- fragile[c(2:4)]
fragile$year <- fragile$year + 1
prelimimplagf <- left_join(prelimimplag, fragile, by = c("ccode", "year")) # merge
prelimimplagf$fragile[is.na(prelimimplagf$fragile)] <- 0 # replace NA with 0
troops <- data.frame(read_dta("AklinKernData.dta", encoding="latin1")) # load financial crisis data from Aklin and Kern (2019)
troops$ccode <- countrycode(troops$countryname, origin = "country.name", destination = "iso3c") # add ccode
troops <- troops[c(2,31,32)] # subset
troops$year <- troops$year + 1 # lag
prelimimplagf <- left_join(unique(prelimimplagf), unique(troops), by = c("ccode", "year")) # merge
prelimimplagf$crisis_any2[is.na(prelimimplagf$crisis_any2)] <- 0

m9 <- glm.nb(mids ~ count + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff + USaid +
               curraccgdp + fdigdp + inflation + fragile + crisis_any2 + lag(mids) +
               factor(ccode) + factor(year), prelimimplagf,
             control=glm.control(maxit=20)) # multivariate with FEs, lag, imputation
(sem9 <- coeftest(m9, vcov = vcovHC(m9, type = "HC0", cluster = prelimimplagf$ccode)))

stargazer(m9, se = list(sem9[,2]),
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = c(covs_full, "U.S. aid", "Current account / GDP", "FDI / GDP", "Inflation", 
                               "Fragile state", "Financial crisis", "Lagged MIDs"),
          style = "ajps", omit = c("ccode", "year"), 
          add.lines = c(ctryfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table A18

##->## Alt exp: US troop deployments
troops <- data.frame(read_dta("AklinKernData.dta", encoding="latin1")) # load US troops data from Aklin and Kern (2019)
troops$ccode <- countrycode(troops$countryname, origin = "country.name", destination = "iso3c") # add ccode
troops <- troops[c(2,31,17)] # subset
troops$year <- troops$year + 1 # lag
prelimimplagt <- left_join(unique(prelimimplag), unique(troops), by = c("ccode", "year")) # merge
prelimimplagt$ustroops[is.na(prelimimplagt$ustroops)] <- 0 # replace NA with 0
prelimimplagt$ustroops <- log1p(prelimimplagt$ustroops)
prelimimplagt <- prelimimplagt[prelimimplagt$year < 2011,] # restrict to years coded by Aklin and Kern

m10 <- glm.nb(mids ~ count + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff + ustroops +
               factor(ccode) + factor(year), prelimimplagt,
             control=glm.control(maxit=20)) # multivariate with FEs, lag, imputation
(sem10 <- coeftest(m10, vcov = vcovHC(m10, type = "HC0", cluster = prelimimplagt$ccode)))

stargazer(m10, se = list(sem10[,2]),
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = c(covs_full, "U.S. troops"),
          style = "ajps", omit = c("ccode", "year"), 
          add.lines = c(ctryfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table A27

##->## Only enforced conditions
IMF <- read.csv("IMF_cond.csv", fileEncoding="UTF-8-BOM") # Load the conditions tab from Kentikelenis et al. (2016) downloaded in October 2018
IMF$count <- 1 # create a counter variable equal to 1 for each condition
IMF$Waiver[is.na(IMF$Waiver)] <- 0 # Recode no waiver as 0
IMF$Waiver[IMF$Waiver > 0] <- 1 # Recode all waivers as 1
for (i in 1:32261){
  IMF$DEB[i] <- ifelse(IMF$Policy.Area..short.[i] == "DEB", 1, 0)
  IMF$FIN[i] <- ifelse(IMF$Policy.Area..short.[i] == "FIN", 1, 0)
  IMF$FP[i] <- ifelse(IMF$Policy.Area..short.[i] == "FP", 1, 0)
  IMF$EXT[i] <- ifelse(IMF$Policy.Area..short.[i] == "EXT", 1, 0)
  IMF$RTP[i] <- ifelse(IMF$Policy.Area..short.[i] == "RTP", 1, 0)
  IMF$SOE[i] <- ifelse(IMF$Policy.Area..short.[i] == "SOE", 1, 0)
  IMF$LAB[i] <- ifelse(IMF$Policy.Area..short.[i] == "LAB", 1, 0)
  IMF$PRI[i] <- ifelse(IMF$Policy.Area..short.[i] == "PRI", 1, 0)
  IMF$SP[i] <- ifelse(IMF$Policy.Area..short.[i] == "SP", 1, 0)
  IMF$POV[i] <- ifelse(IMF$Policy.Area..short.[i] == "POV", 1, 0)
  IMF$INS[i] <- ifelse(IMF$Policy.Area..short.[i] == "INS", 1, 0)
  IMF$ENV[i] <- ifelse(IMF$Policy.Area..short.[i] == "ENV", 1, 0)
  IMF$OTH[i] <- ifelse(IMF$Policy.Area..short.[i] == "OTH", 1, 0)
} # code each condition by policy category
IMFagg <- aggregate(list(IMF$count, IMF$DEB, IMF$FIN, IMF$FP, IMF$EXT, IMF$RTP, IMF$SOE, IMF$LAB, 
                         IMF$PRI, IMF$SP, IMF$POV, IMF$INS, IMF$ENV, IMF$OTH, IMF$Waiver), by = list(IMF$Country.Name, 
                                                                                                     IMF$Country.Code, 
                                                                                                     IMF$Arrangement.Date, 
                                                                                                     IMF$Arrangement.ID, 
                                                                                                     IMF$Year, 
                                                                                                     IMF$Arrangement.Duration,
                                                                                                     IMF$Arrangement.Type,
                                                                                                     IMF$Arrangement.Amount..SDR), 
                    FUN = sum) # aggregate conditions by category and summing by project

colnames(IMFagg) <- c("ctry", "ccode", "date", "ID", "year", "dur", "type", "loansize", "count", "DEB", "FIN", "FP", "EXT", "RTP", 
                      "SOE", "LAB", "PRI", "SP", "POV", "INS", "ENV", "OTH", "waiver") # change column names
waive <- IMFagg[c(2,5,9,23)]
waive$count_enforced <- waive$count-waive$waiver
waive <- waive[c(1,2,5)]
waive <- aggregate(waive$count_enforced, by = list(waive$ccode, waive$year), FUN = max) # eliminate multiple programs per year
colnames(waive) <- c("ccode", "year", "count_enforced")
prelimimplagw <- left_join(unique(prelimimplag), unique(waive), by = c("ccode", "year"))
prelimimplagw$count_enforced[is.na(prelimimplagw$count_enforced)] <- 0
prelimimplagw$count_enforced <- scale(prelimimplagw$count_enforced)

r11 <- glm.nb(mids ~ count_enforced + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
               factor(ccode) + factor(year), prelimimplagw,
             control=glm.control(maxit=20)) # multivariate with FEs, lag, imputation
(se11 <- coeftest(r11, vcov = vcovHC(r11, type = "HC0", cluster = prelimimplagw$ccode)))

stargazer(r11, se = list(se11[,2]),
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = c(covs_full),
          style = "ajps", omit = c("ccode", "year"), 
          add.lines = c(ctryfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table A14

##->## PRGF control
r12 <- glm.nb(mids ~ count + gdppc + trade + population_total + cinc + major_power +
                polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
                PRGF + factor(ccode) + factor(year), prelimimplag,
              control=glm.control(maxit=20)) # multivariate with FEs, lag, imputation
(se12 <- coeftest(r12, vcov = vcovHC(r12, type = "HC0", cluster = prelimimplag$ccode)))

stargazer(r12, se = list(se12[,2]),
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = c(covs_full, "PRGF"),
          style = "ajps", omit = c("ccode", "year"), 
          add.lines = c(ctryfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table A16

##->## Only IMF program participants
IMFaggp <- IMFagg[c(2,5)]
IMFaggp$part <- 1
IMFaggp$year <- IMFaggp$year + 1
imfprog <- left_join(prelimimplag, unique(IMFaggp), c("ccode", "year"))
imfprog$part[is.na(imfprog$part)] <- 0

r13 <- glm.nb(mids ~ count + gdppc + trade + population_total + cinc + major_power +
                polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
                factor(ccode), imfprog[imfprog$part==1,],
              control=glm.control(maxit=20)) # multivariate with FEs, lag, imputation
(se13 <- coeftest(r13, vcov = vcovHC(r13, type = "HC0", cluster = imfprog[imfprog$part==1,]$ccode)))

stargazer(r13, se = list(se13[,2]),
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = c(covs_full),
          style = "ajps", omit = c("ccode"), 
          add.lines = c(ctryfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table A17

####################### Interaction models ####
interaction_plot_binary <- function(model, effect, moderator, interaction, varcov="default", conf=.95, title="Marginal effects plot", xlabel="Value of moderator", ylabel="Estimated marginal coefficient", factor_labels=c(0,1)){
  
  # Extract Variance Covariance matrix
  if (varcov == "default"){
    covMat = vcov(model)
  }else{
    covMat = varcov
  }
  
  # Extract the data frame of the model
  mod_frame = model.frame(model)
  
  # Get coefficients of variables
  beta_1 = model$coefficients[[effect]]
  beta_3 = model$coefficients[[interaction]]
  
  # Create list of moderator values at which marginal effect is evaluated
  x_2 <- c(0,1)
  
  # Compute marginal effects
  delta_1 = beta_1 + beta_3*x_2
  
  # Compute variances
  var_1 = covMat[effect,effect] + (x_2^2)*covMat[interaction, interaction] + 2*x_2*covMat[effect, interaction]
  
  # Standard errors
  se_1 = sqrt(var_1)
  
  # Upper and lower confidence bounds
  z_score = qnorm(1 - ((1 - conf)/2))
  upper_bound = delta_1 + z_score*se_1
  lower_bound = delta_1 - z_score*se_1
  
  # Determine the bounds of the graphing area
  max_y = max(upper_bound)
  min_y = min(lower_bound)
  
  # Initialize plotting window
  plot(x=c(), y=c(), ylim=c(min_y, max_y), xlim=c(-.5, 1.5), xlab=xlabel, ylab=ylabel, main=title, xaxt="n")
  
  # Plot points of estimated effects
  points(x=x_2, y=delta_1, pch=16)
  
  # Plot lines of confidence intervals
  lines(x=c(x_2[1], x_2[1]), y=c(upper_bound[1], lower_bound[1]), lty=1)
  points(x=c(x_2[1], x_2[1]), y=c(upper_bound[1], lower_bound[1]), pch=c(25,24), bg="black")
  lines(x=c(x_2[2], x_2[2]), y=c(upper_bound[2], lower_bound[2]), lty=1)
  points(x=c(x_2[2], x_2[2]), y=c(upper_bound[2], lower_bound[2]), pch=c(25,24), bg="black")
  
  # Label the axis
  axis(side=1, at=c(0,1), labels=factor_labels)
  
  # Add a dashed horizontal line for zero
  abline(h=0, lty=3)
  
}
m2int <- glm.nb(mids ~ count*polity2 + gdppc + 
                  trade + population_total + cinc + major_power +
                  exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
                  factor(ccode) + factor(year), prelimimplag,
                control=glm.control(maxit=20)) # multivariate with FEs, lag, imputation
(sem2int <- coeftest(m2int, vcov = vcovHC(m2int, type = "HC0", cluster = prelimimplag$ccode)))
interaction_plot_continuous <- function(model, effect, moderator, interaction, varcov="default", minimum="min", maximum="max", incr="default", num_points = 50, conf=.95, mean=FALSE, median=FALSE, alph=80, rugplot=T, histogram=T, title="Marginal effects plot", xlabel="Value of moderator", ylabel="Estimated marginal coefficient"){
  
  # Define a function to make colors transparent
  makeTransparent<-function(someColor, alpha=alph){
    newColor<-col2rgb(someColor)
    apply(newColor, 2, function(curcoldata){rgb(red=curcoldata[1], green=curcoldata[2],
                                                blue=curcoldata[3],alpha=alpha, maxColorValue=255)})
  }
  
  # Extract Variance Covariance matrix
  if (varcov == "default"){
    covMat = vcov(model)
  }else{
    covMat = varcov
  }
  
  # Extract the data frame of the model
  mod_frame = model.frame(model)
  
  # Get coefficients of variables
  beta_1 = model$coefficients[[effect]]
  beta_3 = model$coefficients[[interaction]]
  
  # Set range of the moderator variable
  # Minimum
  if (minimum == "min"){
    min_val = min(mod_frame[[moderator]])
  }else{
    min_val = minimum
  }
  # Maximum
  if (maximum == "max"){
    max_val = max(mod_frame[[moderator]])
  }else{
    max_val = maximum
  }
  
  # Check if minimum smaller than maximum
  if (min_val > max_val){
    stop("Error: Minimum moderator value greater than maximum value.")
  }
  
  # Determine intervals between values of the moderator
  if (incr == "default"){
    increment = (max_val - min_val)/(num_points - 1)
  }else{
    increment = incr
  }
  
  # Create list of moderator values at which marginal effect is evaluated
  x_2 <- seq(from=min_val, to=max_val, by=increment)
  
  # Compute marginal effects
  delta_1 = beta_1 + beta_3*x_2
  
  # Compute variances
  var_1 = covMat[effect,effect] + (x_2^2)*covMat[interaction, interaction] + 2*x_2*covMat[effect, interaction]
  
  # Standard errors
  se_1 = sqrt(var_1)
  
  # Upper and lower confidence bounds
  z_score = qnorm(1 - ((1 - conf)/2))
  upper_bound = delta_1 + z_score*se_1
  lower_bound = delta_1 - z_score*se_1
  
  # Determine the bounds of the graphing area
  max_y = max(upper_bound)
  min_y = min(lower_bound)
  
  # Make the histogram color
  hist_col = makeTransparent("grey")
  
  # Initialize plotting window
  plot(x=c(), y=c(), ylim=c(min_y, max_y), xlim=c(min_val, max_val), xlab=xlabel, ylab=ylabel, main=title)
  
  # Plot estimated effects
  lines(y=delta_1, x=x_2)
  lines(y=upper_bound, x=x_2, lty=2)
  lines(y=lower_bound, x=x_2, lty=2)
  
  # Add a dashed horizontal line for zero
  abline(h=0, lty=3)
  
  # Add a vertical line at the mean
  if (mean){
    abline(v = mean(mod_frame[[moderator]]), lty=2, col="red")
  }
  
  # Add a vertical line at the median
  if (median){
    abline(v = median(mod_frame[[moderator]]), lty=3, col="blue")
  }
  
  # Add Rug plot
  if (rugplot){
    rug(mod_frame[[moderator]])
  }
  # Add Histogram (Histogram only plots when minimum and maximum are the min/max of the moderator)
  if (histogram & minimum=="min" & maximum=="max"){
    par(new=T)
    hist(mod_frame[[moderator]], axes=F, xlab="", ylab="",main="", border=hist_col, col=hist_col)
  }
}
interaction_plot_continuous(m2int, "count", "polity2", "count:polity2",
                            xlab="Polity2 (standardized)", ylab="Conditions", title="") # Figure A8

pond <- read_dta("Pond_II2017_Data.dta") # load Pond (2018) data on labor rights
pond <- pond[c(1,2,13,18)] # subset
colnames(pond) <- c("year", "ccode", "laborrights", "labortimespolity")
pond <- left_join(prelimimplag, pond, by = c("ccode", "year"), all.x=T)
pond$laborrights <- scale(pond$laborrights)

m3int <- glm.nb(mids ~ count*laborrights + gdppc + 
                  trade + population_total + cinc + major_power +
                  polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
                  factor(ccode) + factor(year), pond,
                control=glm.control(maxit=20)) # multivariate with FEs, lag, imputation
(sem3int <- coeftest(m3int, vcov = vcovHC(m3int, type = "HC0", cluster = pond$ccode)))
interaction_plot_continuous(m3int, "count", "laborrights", "count:laborrights",
                            xlab="Labor rights (standardized)", ylab="Conditions", title="") # Figure A9

covs_int <- c("Conditions", "Labor rights", "Polity2", "GDPPC", "Trade openness", "Population",
               "CINC", "Major power", "Executive years in office", 
               "Ethnic fractionalization", "UN voting (ideal pt dist from US)", "Conditions X Polity2", 
              "Conditions X Labor rights")
stargazer(m2int, m3int, se=list(sem2int[,2], sem3int[,2]),
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = covs_int,
          style = "ajps", omit = c("ccode", "year"), 
          add.lines = c(ctryfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table A26

##->## Interacting UN votes and US troops with conditions
covs_int <- c("US troops", "UN voting (ideal pt dist from US)", 
              "Conditions", "GDPPC", "Trade openness", "Population",
              "CINC", "Major power", "Polity2", "Executive years in office", 
              "Ethnic fractionalization", "Conditions X UN voting", 
              "Conditions X US troops")
m4int <- glm.nb(mids ~  absidealimportantdiff*count + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac +
               factor(ccode) + factor(year), prelimimplag,
             control=glm.control(maxit=20)) # multivariate with FEs, lag, imputation
(sem4int <- coeftest(m4int, vcov = vcovHC(m4int, type = "HC0", cluster = prelimimplag$ccode)))
interaction_plot_continuous(m4int, "count", "absidealimportantdiff", "absidealimportantdiff:count",
                            xlab="UN voting (dist from US)", ylab="Conditions", title="") # Figure 4

m5int <- glm.nb(mids ~  ustroops*count + gdppc + trade + population_total + cinc + major_power +
                  polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
                  factor(ccode) + factor(year), prelimimplagt,
                control=glm.control(maxit=20)) # multivariate with FEs, lag, imputation
(sem5int <- coeftest(m5int, vcov = vcovHC(m5int, type = "HC0", cluster = prelimimplagt$ccode)))
interaction_plot_continuous(m5int, "count", "ustroops", "ustroops:count",
                            xlab="US troops", ylab="Conditions", title="") # Figure 5

stargazer(m4int, m5int, se = list(sem4int[,2], sem5int[,2]), 
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = covs_int,
          style = "ajps", omit = c("ccode", "year"), 
          add.lines = c(ctryfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table A28

####################### Selection Adjustment (Model 3 in Table 1 + Robustness + Generation of Tables 1 and 2) ##########
c2 <- c("Budget constraint X Participation rate", "Quota", "Time to last IMF program", "Polity2", 
        "Reserves", "GDPPC", "Current account / GDP", "UNSC member", "U.S. aid", "DAC aid", 
        "UN voting", "FDI / GDP", "Inflation", "Openness", "Debt service / exports", 
        "Short-term debt / exports", "War", "Election year") # covariate labels for first stage
prob <- list(c("Model type", "Probit"))
## First stage selection model (program)

partreg <- glm(part ~ inter + quota + last + polity2 + reserves + gdppc + curraccgdp +
                 unsc + USaid + DACaid + absidealdiff + fdigdp + inflation + trade + dservexp + dshortexp +
                 elec + factor(year) + factor(ccode), family = binomial(link = "probit"), 
               data = memsimplag, control=glm.control(maxit=200))
(separt <- coeftest(partreg, vcov = vcovHC(partreg, type = "HC0", cluster = memsimplag$ccode))) # binomial probit with instrument

partregsub <- glm(part ~ quota + last + polity2 + reserves + gdppc + curraccgdp +
                 unsc + USaid + DACaid + absidealdiff + fdigdp + inflation + trade + dservexp + dshortexp +
                 elec + factor(year) + factor(ccode), family = binomial(link = "probit"), 
               data = memsimplag, control=glm.control(maxit=200))
(separtsub <- coeftest(partregsub, vcov = vcovHC(partregsub, type = "HC0", cluster = memsimplag$ccode))) # delete instrument for weak instrument test

memsimplag2 <- memsimplag2[!is.na(memsimplag2$inter),] # eliminate observations with NA interaction
memsimplag2$fit <- fitted(partreg) # add predicted probabilities to data frame

## Merge predicted probabilities into selection data

fitdat <- cbind(as.character(memsimplag2$ccode), memsimplag2$year, memsimplag2$fit) # extract probabilities
fitdat <- unique(fitdat) # eliminate duplicates
colnames(fitdat) <- c("ccode", "year", "fit") # rename columns
prelimimplag4 <- merge(prelimimplag, fitdat, c("ccode", "year")) # merge data
prelimimplag4 <- unique(prelimimplag4) # eliminate duplicates
prelimimplag4$fit <- as.numeric(as.character(prelimimplag4$fit)) # change data format

stargazer(partreg, se = list(separt[,2]),
          dep.var.labels = c("Participation in IMF program"),
          covariate.labels = c2,
          style = "ajps", omit = c("ccode", "year"), 
          add.lines = c(ctryfe, yearfe, prob),
          omit.stat = c("ll", "aic", "theta")) # Table A22

## First stage selection model (conditions)

c3 <- c("Budget constraint X Average conditions", 
        "Budget constraint X Average categories",
        "Budget constraint X Participation rate", 
        "Quota", "Time to last IMF program", "Polity2", 
        "Reserves", "GDPPC", "Current account / GDP", "UNSC member", "U.S. aid", "DAC aid", 
        "UN voting", "FDI / GDP", "Inflation", "Openness", "Debt service / exports", 
        "Short-term debt / exports", "Election year") # covariate labels for first stage

partreg2 <- glm.nb(count ~ intercond + inter + quota + last + polity2 + reserves + gdppc + curraccgdp +
                 unsc + USaid + DACaid + absidealdiff + fdigdp + inflation + trade + dservexp + dshortexp +
                 elec + factor(year) + factor(ccode), 
               data = memsimplag2, control=glm.control(maxit=50))
(separt2 <- coeftest(partreg2, vcov = vcovHC(partreg2, type = "HC0", cluster = memsimplag2$ccode))) # binomial probit with instrument

partregsub2 <- glm.nb(count ~ quota + last + polity2 + reserves + gdppc + curraccgdp +
                    unsc + USaid + DACaid + absidealdiff + fdigdp + inflation + trade + dservexp + dshortexp +
                    elec + factor(year) + factor(ccode), 
                  data = memsimplag2, control=glm.control(maxit=50))
(separtsub2 <- coeftest(partregsub2, vcov = vcovHC(partregsub2, type = "HC0", cluster = memsimplag2$ccode))) # delete instrument for weak instrument test

memsimplag2 <- memsimplag2[!is.na(memsimplag2$intercond),] # eliminate observations with NA interaction
memsimplag2$fitcond <- fitted(partreg2) # add predicted probabilities to data frame

partreg3 <- glm.nb(stone ~ interstone + inter + quota + last + polity2 + reserves + gdppc + curraccgdp +
                     unsc + USaid + DACaid + absidealdiff + fdigdp + inflation + trade + dservexp + dshortexp +
                     elec + factor(year) + factor(ccode), 
                   data = memsimplag2, control=glm.control(maxit=50))
(separt3 <- coeftest(partreg3, vcov = vcovHC(partreg3, type = "HC0", cluster = memsimplag2$ccode))) # binomial probit with instrument

partregsub3 <- glm.nb(stone ~ quota + last + polity2 + reserves + gdppc + curraccgdp +
                        unsc + USaid + DACaid + absidealdiff + fdigdp + inflation + trade + dservexp + dshortexp +
                        elec + factor(year) + factor(ccode), 
                      data = memsimplag2, control=glm.control(maxit=50))
(separtsub3 <- coeftest(partregsub3, vcov = vcovHC(partregsub3, type = "HC0", cluster = memsimplag2$ccode))) # delete instrument for weak instrument test

memsimplag2 <- memsimplag2[!is.na(memsimplag2$interstone),] # eliminate observations with NA interaction
memsimplag2$fitstone <- fitted(partreg3) # add predicted probabilities to data frame

## Merge predicted probabilities into selection data

fitdat <- cbind(as.character(memsimplag2$ccode), memsimplag2$year, memsimplag2$fitstone, memsimplag2$fitcond) # extract probabilities
fitdat <- unique(fitdat) # eliminate duplicates
colnames(fitdat) <- c("ccode", "year", "fitstone", "fitcond") # rename columns
prelimimplag4 <- merge(prelimimplag4, fitdat, c("ccode", "year")) # merge data
prelimimplag4 <- unique(prelimimplag4) # eliminate duplicates
prelimimplag4$fitstone <- as.numeric(as.character(prelimimplag4$fitstone)) # change data format
prelimimplag4$fitcond <- as.numeric(as.character(prelimimplag4$fitcond))

stargazer(partreg2, partreg3, se = list(separt2[,2], separt3[,2]),
          dep.var.labels = c("Conditions", "Categories"),
          covariate.labels = c3,
          style = "ajps", omit = c("ccode", "year", "Constant"), 
          add.lines = c(ctryfe, yearfe, prob),
          omit.stat = c("ll", "aic", "theta")) # Table A23

## Second stage 
m1i <- glm.nb(mids ~ fit + fitcond + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
               factor(ccode) + factor(year), prelimimplag4,
             control=glm.control(maxit=20)) # multivariate with FEs, lag, imputation
(sem1i <- coeftest(m1i, vcov = vcovHC(m1i, type = "HC0", cluster = prelimimplag4$ccode)))

m2i <- glm.nb(mids ~ fit + fitstone + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
               factor(ccode) + factor(year), prelimimplag4,
             control=glm.control(maxit=20)) # multivariate with alternate stringency measure
(sem2i <- coeftest(m2i, vcov = vcovHC(m2i, type = "HC0", cluster = prelimimplag4$ccode)))

covs_fulli <- c("Conditions", "Conditions", "GDPPC", "Trade openness", "Population",
               "CINC", "Major power", "Polity2", "Executive years in office", 
               "Ethnic fractionalization", "UN voting (ideal pt dist from US)")

m1 <- glm.nb(mids ~ count + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
               factor(ccode) + factor(year), prelimimplag,
             control=glm.control(maxit=20)) # multivariate with FEs, lag, imputation
(sem1 <- coeftest(m1, vcov = vcovHC(m1, type = "HC0", cluster = prelimimplag$ccode)))

m2 <- glm.nb(mids ~ stone + gdppc + trade + population_total + cinc + major_power +
               polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
               factor(ccode) + factor(year), prelimimplag,
             control=glm.control(maxit=20)) # multivariate with alternate stringency measure
(sem2 <- coeftest(m2, vcov = vcovHC(m2, type = "HC0", cluster = prelimimplag$ccode)))

stargazer(biv1l, m1, m1i, se = list(sebiv1l[,2], sem1[,2], sem1i[,2]), 
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = covs_fulli,
          style = "ajps", keep = c("count", "fitcond", "gdppc", "trade", 
                            "population_total", "cinc", "major_power",
                            "polity2", "exec_yrs_in_office", 
                            "ethnic_frac", "absidealimportantdiff"),
          add.lines = c(imfp, ctryfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table 1

stargazer(biv2, m2, m2i, se = list(sebiv2[,2], sem2[,2], sem2i[,2]), 
          dep.var.labels = c("Number of MIDs"),
          covariate.labels = covs_fulli,
          style = "ajps", keep = c("stone", "fitstone", "gdppc", "trade", 
                                   "population_total", "cinc", "major_power",
                                   "polity2", "exec_yrs_in_office", 
                                   "ethnic_frac", "absidealimportantdiff"),
          add.lines = c(imfp, ctryfe, yearfe, nb),
          omit.stat = c("ll", "aic", "theta")) # Table 2

## Weak instrument test
waldtest(partreg, partregsub) # F = 29
waldtest(partreg2, partregsub2) # F = 4.3 p = 0.01
waldtest(partreg3, partregsub3) # F = 4.2 p = 0.01

####################### Descriptive plots ##################################################
prelimimp <- read.csv("prelimimp.csv") # reload unscaled data
map <- as.data.frame(cbind(as.character(prelimimp$ccode), as.character(prelimimp$ctry), prelimimp$year, prelimimp$mids)) # prep data for map of average conditionality burdens
colnames(map) <- c("ccode", "ctry","year", "mids") # change col names
map$ctry <- countrycode(map$ccode, origin = "iso3c", destination = "country.name")
map$mids <- as.numeric(as.character(map$mids)) # change format
map <- aggregate(list(map$mids), by = list(map$ccode, map$ctry), mean) # aggregate into mean midss
colnames(map) <- c("ccode", "ctry", "mids") # change col names
maps <- joinCountryData2Map(map, joinCode = "ISO3", nameJoinColumn = "ccode", nameCountryColumn = "ctry") # assign data to map
maps <- subset(maps, continent != "Antarctica")
mapss <- mapCountryData(mapToPlot = maps, nameColumnToPlot = "mids", mapRegion = "world", 
                        catMethod = "quantiles", colourPalette = brewer.pal(11, name = "Reds"),
                        addLegend = FALSE, mapTitle = "")
do.call( addMapLegend, c(mapss, legendWidth=0.5, legendMar = 5))
maps2 <- joinCountryData2Map(map, joinCode = "ISO3", nameJoinColumn = "ccode", nameCountryColumn = "ctry") # Figure A6

mnp <- aggregate(prelimimp$mids, by = list(prelimimp$year), mean) # aggregate data into average counts over time
plot(mnp, type = "h", xlab = "Year", ylab = "Average Number of MIDs",
     cex.lab=1.5, cex.axis=1.5) # plot average mids over time
barplot(mnp$x, names.arg = mnp$Group.1, xlab = "Year", ylab = "Average Number of MIDs",
        cex.lab = 1.5) # Figure A7

####################### Descriptive stats ########################
covs_desc <- covs_full # covariate labels
covs_full <- c("Number of MIDs", "Conditions", "GDPPC", "Trade openness", "Population",
               "CINC", "Major power", "Polity2", "Executive years in office", 
               "Ethnic fractionalization")
desc <- read.csv("prelimimp.csv") # reload unscaled data
desc <- desc[desc$year < 2015,]
desc <- desc[,c(5,12,29,32,51,52,53,42,54,55)]
stargazer(desc, omit.summary.stat = c("p25", "p75"), digits = 2, style = "ajps",
          covariate.labels = covs_desc) # Table A10

desc <- read.csv("prelimlag.csv") # reload unscaled data
desc <- desc[desc$year < 2015,]
desc <- desc[,c(5,12,29,32,51,52,53,42,54,55)]
stargazer(desc, omit.summary.stat = c("p25", "p75"), digits = 2, style = "ajps",
          covariate.labels = covs_desc) # Table A11

####################### Text analysis ########
##->## Appending text data 
## These data were created by using the Google translate API
## on the manifesto data corpus. This code is available upon request
t1 <- read.csv("Translations/bulgaria.csv")
t2 <- read.csv("Translations/english.csv")
t3 <- read.csv("Translations/hebrew_thru_latvian.csv")
t4 <- read.csv("Translations/macedonian_thru_ukranian.csv")
t5 <- read.csv("Translations/spanish_thru_italian.csv")
text <- rbind(t1, t2, t3, t4, t5)

##->## Cleaning and preprocessing
Corpus <- Corpus(VectorSource(text$translatedText)) # convert to corpus
Corpus.clean <- tm_map(Corpus, stripWhitespace) # strip white space
Corpus.clean <- tm_map(Corpus.clean, removePunctuation) # remove punctuation
Corpus.clean <- tm_map(Corpus.clean, tolower) # convert to lower case
Corpus.clean <- tm_map(Corpus.clean, removeWords, stopwords("english"))
dataframe <- data.frame(text=unlist(sapply(Corpus.clean, `[`)), stringsAsFactors=F) # convert to frame

text <- cbind(text, dataframe) # re-append text to frame
text <- text[-c(1:3,7)]
text$year <- as.numeric(substr(text$date, 1, 4))
text <- text[-c(3)]

load("cmp_parlgov_vdem_combined.RData") # cmp metadata + parlgov + vdem ruling party info
cmp_parlgov_vdem$year <- as.numeric(cmp_parlgov_vdem$year)
# vdem_govt_support:does party support government formed after election?
# 0 = yes as a senior partner; HoG belongs to this party
# 1 = yes as a junior partner: HoG does not belong to this party, but 1+ cabinet members do
# 2 = yes but party not officially represented in govt
# 3 = no party is in opposition
# 4 = NA; no govt took office this election (yet)

textp <- left_join(unique(text), unique(cmp_parlgov_vdem), by = c("party", "year"))

# create subsets of the data to use in model to try to just include manifestos of ruling parties, etc (3 versions)
text_vdem <- subset(textp, vdem_govt_support == 0 | 
                      vdem_govt_support == 1 | 
                      vdem_govt_support == 2)

text_prime_minister <- subset(textp, pgv_prime_minister == 1)
text_prime_minister <- text_prime_minister[-c(5,7)]
colnames(text_prime_minister)[5] <- "ccode"
text_cabinet <- subset(textp, pgv_cabinet_party == 1)
text_cabinet <- text_cabinet[-c(5,7)]
colnames(text_cabinet)[5] <- "ccode"

imfsub <- read.csv("prelimlagimp.csv") # load data on IMF programs
conditions <- left_join(unique(text_cabinet), unique(imfsub), by = c("ccode", "year")) # merge IMF data into text data
conditions <- conditions[!is.na(conditions$gdppc),]
processed <- textProcessor(conditions$text, metadata = conditions) # process text data
out <- prepDocuments(processed$documents, processed$vocab, processed$meta) # build data
processed$meta$count[is.na(processed$meta$count)] <- 0

# Determine number of topics
processed2 <- processed[-processed$docs.removed]
tmulti <- manyTopics(processed2$documents, processed2$vocab, 
                     K = 2:20, prevalence = ~ count, max.em.its = 10, 
                     runs = 5, data = processed2$meta, seed = 1234)
t <- tmulti$semcoh
s <- tmulti$exclusivity
sc <- c()
ex <- c()
cols <- c()
for(i in 1:19){
  sc <- c(sc, mean(t[[i]]))
  ex <- c(ex, mean(s[[i]]))
  cols <- c(2:20)
}
pex <- data.frame(cbind(sc, ex, cols))
pex$cols <- as.factor(pex$cols)
ggplot(pex, aes(x = sc, y = ex, colour = cols)) +
  geom_point() # 10 looks good; Figure A1

# Run stm
imf.model <- selectModel(processed2$documents, processed2$vocab, K=10, 
                         prevalence = ~ count + gdppc + trade + 
                           population_total + cinc + major_power +
                           polity2 + exec_yrs_in_office + ethnic_frac +
                           factor(ccode) + factor(year), max.em.its = 10,
                         runs=2, data = processed2$meta, seed = 1234) # run model with 10 topics
imf.model1 <- imf.model$runout[[1]] # select 1
labelTopics(imf.model1) # see most common words for labeling
imf.model1.effect <-  estimateEffect(1:10 ~ count + gdppc + trade + 
                                       population_total + cinc + major_power +
                                       polity2 + exec_yrs_in_office + ethnic_frac +
                                       factor(ccode) + factor(year), 
                                     imf.model1, meta=processed2$meta, uncertainty = "Global")
summary(imf.model1.effect) # examine which topics are significant

texreg(imf.model1.effect,
       omit.coef = "factor") # Table A2; NOTE that the exact coefficients and errors may vary slightly from run to run but the substantive interpretation and significance should match

plot(imf.model1.effect, covariate = "count", topics = c(1:10), 
     method = "difference", cov.value1=1, cov.value2=0, ci.level = 0.9, 
     xlab = "Change in topical prevalence when adding condition", 
     main = "", xlim = c(-.002,.002), labeltype = "custom", 
     custom.labels = c('Government programs', 'Economic growth', 
                       'Industrial policy', 'Public spending', 
                       'Energy sector', 'Law and courts',
                       'Tax policy', 'Legislative plans',  
                       'International relations', 'Physical infrastructure'),
     cex.lab=1.5, cex.axis=1.5) # Figure 1

oo_1 <- findThoughts(imf.model1, texts=processed$meta$text, topics=1, n=10)
oo_1[["docs"]][["Topic 1"]][3]
oo_1[["docs"]][["Topic 1"]][4] # Government programs; rep responses in Table A1 (we went back to main text sheet and pulled out sentences before pre-processing for readability)

oo_2 <- findThoughts(imf.model1, texts=processed$meta$text, topics=2, n=50)
oo_2[["docs"]][["Topic 2"]][40]
oo_2[["docs"]][["Topic 2"]][44]
oo_2[["docs"]][["Topic 2"]][49]
oo_2[["docs"]][["Topic 2"]][17] # Economic growth*

oo_3 <- findThoughts(imf.model1, texts=processed$meta$text, topics=3, n=50)
oo_3[["docs"]][["Topic 3"]][25]
oo_3[["docs"]][["Topic 3"]][7] # Industrial policy

oo_4 <- findThoughts(imf.model1, texts=processed$meta$text, topics=4, n=50)
oo_4[["docs"]][["Topic 4"]][8]
oo_4[["docs"]][["Topic 4"]][48]
oo_4[["docs"]][["Topic 4"]][44]
oo_4[["docs"]][["Topic 4"]][35]
oo_4[["docs"]][["Topic 4"]][11] # Public spending*

oo_5 <- findThoughts(imf.model1, texts=processed$meta$text, topics=5, n=100)
oo_5[["docs"]][["Topic 5"]][100]
oo_5[["docs"]][["Topic 5"]][96] # Energy sector

oo_6 <- findThoughts(imf.model1, texts=processed$meta$text, topics=6, n=50)
oo_6[["docs"]][["Topic 6"]][3]
oo_6[["docs"]][["Topic 6"]][43] # Laws and courts

oo_7 <- findThoughts(imf.model1, texts=processed$meta$text, topics=7, n=100)
oo_7[["docs"]][["Topic 7"]][70]
oo_7[["docs"]][["Topic 7"]][88] # Tax policy

oo_8 <- findThoughts(imf.model1, texts=processed$meta$text, topics=8, n=100)
oo_8[["docs"]][["Topic 8"]][100]
oo_8[["docs"]][["Topic 8"]][98] # Legislative plans

oo_9 <- findThoughts(imf.model1, texts=processed$meta$text, topics=9, n=100)
oo_9[["docs"]][["Topic 9"]][73]
oo_9[["docs"]][["Topic 9"]][48]
oo_9[["docs"]][["Topic 9"]][43]
oo_9[["docs"]][["Topic 9"]][73] 
oo_9[["docs"]][["Topic 9"]][93] 
list.search(oo_9$docs, .[grepl("econ",.)], "character")
list.search(oo_9$docs, .[grepl("mili",.)], "character") 
list.search(oo_9$docs, .[grepl("secur",.)], "character") # International relations*

oo_10 <- findThoughts(imf.model1, texts=processed$meta$text, topics=10, n=100)
oo_10[["docs"]][["Topic 10"]][95] 
oo_10[["docs"]][["Topic 10"]][36] # Physical infrastructure

plot(imf.model1, type="labels", n = 10, topics=c(1:10), 
     topic.names = c('Government programs', 'Economic growth', 
                     'Industrial policy', 'Public spending',
                     'Energy sector', 'Law and courts',
                     'Tax policy', 'Legislative plans',
                     'International relations', 'Physical infrastructure')) # plot key words from conditions Figure A2

## Lag model repeating steps from above with lagged IV
conditions <- left_join(unique(text_cabinet), unique(imfsub), by = c("ccode", "year")) # merge IMF data into text data
conditions <- conditions[!is.na(conditions$gdppc),]
conditions <- conditions %>% 
  group_by(ccode, year) %>%
  mutate(lag1_count = lag(count, n=1))
processed <- textProcessor(conditions$text, metadata = conditions) # process text data
out <- prepDocuments(processed$documents, processed$vocab, processed$meta) # build data
processed$meta$lag1_count[is.na(processed$meta$lag1_count)] <- 0
processed2 <- processed[-processed$docs.removed]

imf.modell <- selectModel(processed2$documents, processed2$vocab, K=10, 
                         prevalence = ~ lag1_count + gdppc + trade + 
                           population_total + cinc + major_power +
                           polity2 + exec_yrs_in_office + ethnic_frac +
                           factor(ccode) + factor(year), max.em.its = 10,
                         runs=2, data = processed2$meta, seed = 1234) # run model with 10 topics
imf.modell1 <- imf.modell$runout[[1]] # select 1
labelTopics(imf.modell1) # see most common words for labeling
imf.modell1.effect <-  estimateEffect(1:10 ~ lag1_count + gdppc + trade + 
                                       population_total + cinc + major_power +
                                       polity2 + exec_yrs_in_office + ethnic_frac +
                                       factor(ccode) + factor(year), 
                                     imf.modell1, meta=processed2$meta, uncertainty = "Global")
summary(imf.modell1.effect) # examine which topics are significant

texreg(imf.modell1.effect,
       omit.coef = "factor") # Table A3; NOTE that the exact coefficients and errors may vary slightly from run to run but the substantive interpretation and significance should match

plot(imf.modell1.effect, covariate = "lag1_count", topics = c(1:10), 
     method = "difference", cov.value1=1, cov.value2=0, ci.level = 0.9, 
     xlab = "Change in topical prevalence when adding condition (lag)", 
     main = "", xlim = c(-.002,.002), labeltype = "custom", 
     custom.labels = c('Government programs', 'Economic growth', 
                       'Industrial policy', 'Public spending',
                       'Energy sector', 'Law and courts',
                       'Tax policy', 'Legislative plans',
                       'International relations', 'Physical infrastructure'),
     cex.lab=1.5, cex.axis=1.5) # Figure A3

oo_1 <- findThoughts(imf.modell1, texts=processed2$meta$text, topics=1, n=100)
oo_1[["docs"]][["Topic 1"]][35]  # Physical infrastructure
oo_1[["docs"]][["Topic 1"]][97]
oo_1[["docs"]][["Topic 1"]][98]

oo_2 <- findThoughts(imf.modell1, texts=processed2$meta$text, topics=2, n=50)
oo_2[["docs"]][["Topic 2"]][40]
oo_2[["docs"]][["Topic 2"]][44]  # Economic growth

oo_3 <- findThoughts(imf.modell1, texts=processed2$meta$text, topics=3, n=50)
oo_3[["docs"]][["Topic 3"]][4]
oo_3[["docs"]][["Topic 3"]][7] # Culture

oo_4 <- findThoughts(imf.modell1, texts=processed2$meta$text, topics=4, n=50)
oo_4[["docs"]][["Topic 4"]][8]
oo_4[["docs"]][["Topic 4"]][3] # Public spending

oo_5 <- findThoughts(imf.modell1, texts=processed2$meta$text, topics=5, n=100)
oo_5[["docs"]][["Topic 5"]][81]
oo_5[["docs"]][["Topic 5"]][100] # Energy policy

oo_6 <- findThoughts(imf.modell1, texts=processed2$meta$text, topics=6, n=50)
oo_6[["docs"]][["Topic 6"]][18]
oo_6[["docs"]][["Topic 6"]][43] # Laws and courts

oo_7 <- findThoughts(imf.modell1, texts=processed2$meta$text, topics=7, n=100)
oo_7[["docs"]][["Topic 7"]][71]
oo_7[["docs"]][["Topic 7"]][88] # tax policy

oo_8 <- findThoughts(imf.modell1, texts=processed2$meta$text, topics=8, n=100)
oo_8[["docs"]][["Topic 8"]][10]
oo_8[["docs"]][["Topic 8"]][9] #  Legislative plans

oo_9 <- findThoughts(imf.modell1, texts=processed2$meta$text, topics=9, n=100)
oo_9[["docs"]][["Topic 9"]][95] # International relations
oo_9[["docs"]][["Topic 9"]][47]

oo_10 <- findThoughts(imf.modell1, texts=processed2$meta$text, topics=10, n=100)
oo_10[["docs"]][["Topic 10"]][3] 
oo_10[["docs"]][["Topic 10"]][10] # Industrial policy

## Placebo for government opposition
text_cabinet <- subset(textp, pgv_cabinet_party == 0) # subset to opposition parties
text_cabinet <- text_cabinet[-c(5,7)]
colnames(text_cabinet)[5] <- "ccode"

conditions2 <- left_join(unique(text_cabinet), unique(imfsub), by = c("ccode", "year")) # merge IMF data into text data
conditions2 <- conditions2[!is.na(conditions2$gdppc),]
processeda <- textProcessor(conditions2$text, metadata = conditions2) # process text data
out <- prepDocuments(processeda$documents, processeda$vocab, processeda$meta) # build data
processeda$meta$count[is.na(processeda$meta$count)] <- 0

# Determine number of topics
processeda <- processeda[-processeda$docs.removed]
tmulti <- manyTopics(processeda$documents, processeda$vocab, 
                     K = 2:20, prevalence = ~ count, max.em.its = 10, 
                     runs = 5, data = processeda$meta, seed = 1234)
t <- tmulti$semcoh
s <- tmulti$exclusivity
sc <- c()
ex <- c()
cols <- c()
for(i in 1:19){
  sc <- c(sc, mean(t[[i]]))
  ex <- c(ex, mean(s[[i]]))
  cols <- c(2:20)
}
pex <- data.frame(cbind(sc, ex, cols))
pex$cols <- as.factor(pex$cols)
ggplot(pex, aes(x = sc, y = ex, colour = cols)) +
  geom_point() # 8 looks good; Figure A4

# Run stm
imf.model <- selectModel(processeda$documents, processeda$vocab, K=8, 
                         prevalence = ~ count + gdppc + trade + 
                           population_total + cinc + major_power +
                           polity2 + exec_yrs_in_office + ethnic_frac +
                           factor(ccode) + factor(year), max.em.its = 10,
                         runs=2, data = processeda$meta, seed = 1234) # run model with 8 topics
imf.model1 <- imf.model$runout[[1]] # select 1
labelTopics(imf.model1) # see most common words for labeling
imf.model1.effect <-  estimateEffect(1:8 ~ count + gdppc + trade + 
                                       population_total + cinc + major_power +
                                       polity2 + exec_yrs_in_office + ethnic_frac +
                                       factor(ccode) + factor(year), 
                                     imf.model1, meta=processeda$meta, uncertainty = "Global")
summary(imf.model1.effect) # examine which topics are significant

texreg(imf.model1.effect,
       omit.coef = "factor") # Table A5; NOTE that the exact coefficients and errors may vary slightly from run to run but the substantive interpretation and significance should match

plot(imf.model1.effect, covariate = "count", topics = c(1:8), 
     method = "difference", cov.value1=1, cov.value2=0, ci.level = 0.9, 
     xlab = "Change in topical prevalence when adding condition", 
     main = "", xlim = c(-.002,.002), labeltype = "custom", 
     custom.labels = c('Tax policy', 'Civil liberties', 'Education', 'Financial sector', 
                       'Healthcare', 'Economic growth', 'Physical infrastructure', 'Industrial policy'),
     cex.lab=1.5, cex.axis=1.5) # Figure 2

oo_1 <- findThoughts(imf.model1, texts=processeda$meta$text, topics=1, n=20)
oo_1[["docs"]][["Topic 1"]][3]
oo_1[["docs"]][["Topic 1"]][19] # Tax policy; rep responses appear in Table A4

oo_2 <- findThoughts(imf.model1, texts=processeda$meta$text, topics=2, n=100)
oo_2[["docs"]][["Topic 2"]][30]
oo_2[["docs"]][["Topic 2"]][45] # Non-intervention

oo_3 <- findThoughts(imf.model1, texts=processeda$meta$text, topics=3, n=100)
oo_3[["docs"]][["Topic 3"]][82]
oo_3[["docs"]][["Topic 3"]][94] # Education

oo_4 <- findThoughts(imf.model1, texts=processeda$meta$text, topics=4, n=10)
oo_4[["docs"]][["Topic 4"]][5]
oo_4[["docs"]][["Topic 4"]][9] # Financial policy

oo_5 <- findThoughts(imf.model1, texts=processeda$meta$text, topics=5, n=10)
oo_5[["docs"]][["Topic 5"]][2]
oo_5[["docs"]][["Topic 5"]][9] # Healthcare

oo_6 <- findThoughts(imf.model1, texts=processeda$meta$text, topics=6, n=10)
oo_6[["docs"]][["Topic 6"]][7]
oo_6[["docs"]][["Topic 6"]][6] # Economic growth

oo_7 <- findThoughts(imf.model1, texts=processeda$meta$text, topics=7, n=10)
oo_7[["docs"]][["Topic 7"]][7]
oo_7[["docs"]][["Topic 7"]][6] # Physical infrastructure

oo_8 <- findThoughts(imf.model1, texts=processeda$meta$text, topics=8, n=100)
oo_8[["docs"]][["Topic 8"]][87] # Industrial policy

plot(imf.model1, type="labels", n = 8, topics=c(1:8), 
     topic.names = c('Tax policy', 'Civil liberties', 'Education', 'Financial sector', 
                     'Healthcare', 'Economic growth', 'Physical infrastructure', 'Industrial policy')) # plot key words from conditions (Figure A5)

####################### Sensitivity analysis ####
reg1l <- lm(mids ~ count + gdppc + trade + population_total + cinc + major_power +
              polity2 + exec_yrs_in_office + ethnic_frac + absidealimportantdiff +
              factor(ccode) + factor(year), data = prelimimplag)
(sereg1l <- coeftest(reg1l, vcov = vcovHC(reg1l, type = "HC0", cluster = prelimimplag$ccode))) 
# I use lm here because sensemakr only takes linear regression inputs. Results are similar to the neg bin. I include
# my focal treatment variable (count) along with the full cohort of controls in line with the recommendations of the 
# package materials, and I test sensitivity for both DVs at the 0.1 significance level.

covs_sens <- list(name = c("gdppc", "trade", "population_total", "cinc", "major_power", 
                  "polity2", "exec_yrs_in_office", "ethnic_frac", "absidealimportantdiff"))

sens <- sensemakr(model = reg1l, 
                  treatment = "count",
                  benchmark_covariates = covs_sens,
                  alpha = 0.05,
                  kd = 1:3)

summary(sens)
ovb_minimal_reporting(sens, format = "latex") # Table A21 in appendix

sink()
